#
# Analyze.R
#
# Analyze results from Learning Together Slowly
# main experiment. Seth J. Hill.
#

rm(list=ls())
if (.Platform$OS == "windows") {
  # Set working directory in location of script.
  .doit <- function() { # only works with R.exe; trap errors if using Rscript.exe
  f_f <- lapply(sys.frames(),function(x) x$ofile);f_f <- Filter(Negate(is.null),f_f) ; PTH <- dirname(f_f[[length(f_f)]]); setwd(PTH) ; rm(PTH,f_f)
  }
  try(.doit(),silent=T)
}
library(bit64)
library(data.table)
options(stringsAsFactors=F)
library(RColorBrewer)
palette(brewer.pal(n=9,name="Set1")[c(1:5,7:9)])
library(car)

#
# Which weights to use?
# "" - unweighted.
# "-Pew" - weights to Pew Polarization survey.
# Limit by IQ?
# "-iq-hi" - Pew weights, subjects in top half of IQ.
# "-iq-lo" - Pew weights, subjects in bottom half of IQ.
# 
#
.use <- c("","-Pew","-iq-hi","-iq-lo")
.use <- .use[2]

#
# Call in and recode data and other
# object necessary for analysis.
#
# What to do when calculating logit of responses
# 0 or 100? logit.cert.adjust is the adjust argument
# to car:::logit() function. When NA, sets responses
# to missing.
logit.cert.adjust <- .01
source("99_CallData.R")

#
# Demographic summaries of current sample.
#
cat(sprintf("The average quiz score was %1.1f, with a minimum of %1.0f and a maximum of %1.0f.\n",wide[,weighted.mean(quiz.score,w=weight,na.rm=T)],wide[,wtd.quantile(quiz.score,weights=weight,probs=0,na.rm=T)],wide[,wtd.quantile(quiz.score,weights=weight,probs=1,na.rm=T)]))

cat("Demographics:\nAverage age:",wide[,weighted.mean(age,w=weight,na.rm=T)],
"\nPct female:",wide[,prop.table(wtd.table(gender,weight,'table'))[1]],
"\nEducation:\n")
print(wide[,prop.table(wtd.table(educ2,weight,'table'))])
cat("\nPartisan distribution:\n")
print(wide[,prop.table(wtd.table(pid.summ,weight,'table'))])
cat("\nIdeology:\n")
print(wide[,prop.table(wtd.table(self.ideo,weight,'table'))])
cat("\nVoted 2012:\n")
print(wide[,prop.table(wtd.table(voted.12,weight,'table'))])
flush.console()

#
# =============================
# Analyze updating.
# =============================
#

#
# ====================
cat("\nBasic plots of updating.\n")
# ====================
#

if (.use == "") { # doesn't vary by weights.
  cat("\nMaking plot of perfect Bayesian updating vs
  observed, by round and overall.\n")
  flush.console()

  # Plot them.
  # Set first round "perfect Bayesian" to first round beliefs for plotting purposes.
  .plotPerfBayes <- function(long,main=NULL,lty=1) {
    if (long[,all(is.na(c(pr.true,perf.bayes.cert)))]) { 
      # If no y-observations.
      plot(0,1,type='n',ann=F,axes=F, main="(No responses)")
      return(NA)
    }
    long[,matplot(x=round.num,y=cbind(pr.true,perf.bayes.cert),lty=lty,lwd=c(4,3),col=c("black","gray"),type='b',pch=19,xlab="Round number and signal",ylab="Posterior probability true",main=main,axes=F)];axis(1);axis(2,las=2)
    # Horizontal line at 50.
    abline(h=50,lty=3,col='gray')
    # Indicate signal received along x axis.
    long[,axis(1,at=1:5,labels=ifelse(is.na(signal),"",signal),tick=F,line=1)]
  }

  pdf(file.path("tablesAndFigs", "PerfectBayesVsActualByRespondent.pdf"), width=7,heigh=9)
  par.old <- par(mfrow=c(3,1),mar=c(4.1,4.1,2.1,1.1),oma=c(0,0,2,0),cex.lab=1.2,cex.main=1.2)
  for (i in long[,sort(unique(resp.id))]) {
    # Contest 1.
    .plotPerfBayes(long[resp.id == i & stub == "contest1",],main="First contest",lty=1)
    # Contest 2.
    .plotPerfBayes(long[resp.id == i & stub == "contest2",],main="Second contest",lty=1)
    # Self confidence.
    .plotPerfBayes(long[resp.id == i & stub == "selfcon",],main="IQ contest",lty=1)
    title(main=paste("Respondent",i),outer=T,line=1)
  }
  par(par.old)
  dev.off()
}


#
# ====================
cat("\nTabulation of mean prior and posterior beliefs.\n")
# ====================
#
.identifyContests <- function(long,wide,fact) {
  # Create data.table of respondents and rounds
  # that identify which round-respondents answered
  # a specific fact question.
  # Arguments.
  #  long - the long data table of responses.
  #  wide - the wide data table of respondents.
  #  fact - the name of the fact.

  # Remove " (TRUE)" or " (FALSE)" from fact.
  fact <- gsub(" \\(TRUE\\)| \\(FALSE\\)","",fact)
  
  # Those asked this fact in first contest.
  contests <- CJ(resp.id=wide[fact_contest1 == fact,resp.id],round=c("contest11","contest15"))
  # rbind those asked this fact in second contest
  contests <- rbindlist(list(contests,
  CJ(resp.id=wide[fact_contest2 == fact,resp.id],round=c("contest21","contest25"))))
  
  # Merge respondent reported beliefs.
  contests <- merge(contests,long[,c("resp.id","round","pr.true","round.num","weight"),with=F],by=c("resp.id","round"),all.x=T,all.y=F)

  return(contests)
}

res <- data.table()
for (fact in facts) {
  # Calculate prior and posterior mean for all.
  contests <- .identifyContests(long,wide,fact)
  mean.all <- contests[,list(mu=weighted.mean(pr.true,w=weight,na.rm=T)),by=round.num]
  # Calculate prior and posterior mean for Dems.
  contests <- .identifyContests(long,wide[pid.summ == "Democrat",],fact)
  mean.dem <- contests[,list(mu=weighted.mean(pr.true,w=weight,na.rm=T)),by=round.num]
  # Calculate prior and posterior mean for Reps.
  contests <- .identifyContests(long,wide[pid.summ == "Republican",],fact)
  mean.rep <- contests[,list(mu=weighted.mean(pr.true,w=weight,na.rm=T)),by=round.num]
  # Add to table.
  .roundIt <- function(x,rnd) {
    # Return rounded value from x for rnd.
    round(x[round.num == rnd,mu],1)
  }
  res <- rbindlist(list(res,data.table(Question=fact,  Prior.all=.roundIt(mean.all,1), Post.all=.roundIt(mean.all,5), Prior.dem=.roundIt(mean.dem,1), Post.dem=.roundIt(mean.dem,5), Prior.rep=.roundIt(mean.rep,1), Post.rep=.roundIt(mean.rep,5))))
}

# Create the table.
setkey(res,Prior.dem) # sort on Dem prior.
.makeRow <- function(x) {
  # Make a latex row.
  paste(paste(x,collapse=" & "),"\\\\")
}
out <- c("\\\\","\\hline",
"Question & \\multicolumn{2}{c}{All respondent means} & \\multicolumn{2}{c}{Democrat means} & \\multicolumn{2}{c}{Republican means}  \\\\",
"\\hline",
"         & Prior & Posterior & Prior & Posterior & Prior & Posterior \\\\",
"\\hline","\\hline",
apply(res,1,.makeRow),
"\\hline")

# Add IQ results.
# Identify prior and posterior beliefs.
contests <- merge(long[round %in% c("selfcon1","selfcon5"),], wide[,c("resp.id","quizAboveBelow"),with=F], by="resp.id", all.x=T,all.y=F)
# Calculate averages, separately for those whose
# score was above or below the cutoff.
mean.all <- contests[,list(mu=weighted.mean(pr.true,w=weight,na.rm=T)),by=c("round.num","quizAboveBelow")]
mean.dem <- contests[pid.summ == "Democrat", list(mu=weighted.mean(pr.true,w=weight,na.rm=T)),by=c("round.num","quizAboveBelow")]
mean.rep <- contests[pid.summ == "Republican", list(mu=weighted.mean(pr.true,w=weight,na.rm=T)),by=c("round.num","quizAboveBelow")]
# Create data.table.
setkey(mean.all,quizAboveBelow)
setkey(mean.dem,quizAboveBelow)
setkey(mean.rep,quizAboveBelow)
# Those above.
ab <- "Above" # to query appropriate rows.
res <- 
data.table(Question="Your IQ quiz score is in the top half (respondents for which TRUE).",  Prior.all=.roundIt(mean.all[ab,],1), Post.all=.roundIt(mean.all[ab,],5), Prior.dem=.roundIt(mean.dem[ab,],1), Post.dem=.roundIt(mean.dem[ab,],5), Prior.rep=.roundIt(mean.rep[ab,],1), Post.rep=.roundIt(mean.rep[ab,],5))
# Those below.
ab <- "Below" # to query appropriate rows.
res <- rbindlist(list(res,
data.table(Question="Your IQ quiz score is in the top half (respondents for which FALSE).",  Prior.all=.roundIt(mean.all[ab,],1), Post.all=.roundIt(mean.all[ab,],5), Prior.dem=.roundIt(mean.dem[ab,],1), Post.dem=.roundIt(mean.dem[ab,],5), Prior.rep=.roundIt(mean.rep[ab,],1), Post.rep=.roundIt(mean.rep[ab,],5))))

# Add to the table.
out <- c(out,apply(res,1,.makeRow),"\\hline")

# Write it to screen and out.
cat(paste(out,collapse="\n "))
write(paste(out,collapse="\n "),file=file.path("tablesAndFigs",sprintf("Table1%s.tex",.use)))

#
# ====================
# Regression results at individual level for partisan
# contests. (for note in paper)
# ====================
#
.coefGrab <- function(logit.cert,logit.cert.prev,sig.comb) {
  # Return signal coefficient, SE, and p-value
  # on hypothesis sig.comb coef = 1 as vector.
  res <- lm(logit.cert ~ -1 + logit.cert.prev + sig.comb)
  # Check to make sure enough variation to estimate.
  if (summary(res)$sigma == 0 | is.na(coef(res)["sig.comb"])) {
    return(list(NA,NA,NA))
  }
  #print(summary(res))
  # Hypothesis test.
  library(car)
  r2 <- linearHypothesis(res,matrix(c(0,1),nrow=1),rhs=c(1))
  list("coef"=coef(res)["sig.comb"],"se"=sqrt(vcov(res)["sig.comb","sig.comb"]),"wald.p"=r2[["Pr(>F)"]][2])
}
# Test.
#res <- long[part.fact == 1 & resp.id == 415 & !is.na(logit.cert) & !is.na(logit.cert.prev) & !is.na(sig.comb),.coefGrab(logit.cert,logit.cert.prev,sig.comb)]
res <- data.table()
for (i in long[,sort(unique(resp.id))]) {
  r <- long[part.fact == 1 & resp.id == i & !is.na(logit.cert) & !is.na(logit.cert.prev) & !is.na(sig.comb),.coefGrab(logit.cert,logit.cert.prev,sig.comb)]
  if (!any(is.na(r))) {
    r[,resp.id := i]
    res <- rbindlist(list(res,r))
  }
}
res[,lt.1 := (coef < 1)]
res[,lt.1.and.ss := (lt.1 & wald.p/2 < .05)]
txt <- sprintf("Of %s subjects with enough variation in responses to estimate the model, %s (%1.1f percent) had a coefficient on the signal less than 1. Of those, %s were statistically significant from 1 in a one-tailed test.",nrow(res),res[,sum(lt.1)],100*res[,mean(lt.1)],res[,sum(lt.1.and.ss)])
cat(strwrap(txt),"\n")
write(txt,file=file.path("tablesAndFigs", "IndividualLevelCaution.txt"))
# Histogram of individual-level betas.
par.old <- par(mar=c(4.1,1.1,3.1,1.1))
res[,hist(coef,breaks=35,ann=F,axes=F)]
axis(1,at=seq(-3,3,1));title(xlab="Beta",main="Distribution of individual Bayes learning betas");abline(v=1,col=3,lwd=3)
par(par.old)
rm(r,res,txt)


#
# ===================
# Write out data to run regression
# models in Stata. See RegressionModels.do.
# ===================
#
cat("Writing out data for regression analysis in Stata.\n")

# Variables needed.
# Classify signal as consistent with first round beliefs.
long[,signal.cons.w.first := 1*( (pr.true.first.round > 50 & signal == "TRUE") | (pr.true.first.round < 50 & signal == "FALSE"))]
# Check.
long[,table(pr.true.first.round,signal,signal.cons.w.first,exclude=NULL)]

# Separate those who like politicans who compromise
# from those who do not.
wide[,like.compromise := 1*(substr(pol.compromise,30,40) == "make compro")]
wide[,table(pol.compromise,like.compromise)]
# Summary measure of ideology.
wide[,ideo2 := ifelse(regexpr("onservative",self.ideo ) != -1,"Con",ifelse(regexpr("iberal",self.ideo) != -1,"Lib","Mod"))]
wide[,table(self.ideo,ideo2)]
# Those who donated to or contacted politicians.
wide[,poli.contact.or.donate := 1*((substr(political.contact,1,2) == "Ye") | (substr(political.donation,1,2) == "Ye"))]

long <- merge(long,wide[,c("resp.id","like.compromise","ideo2","poli.contact.or.donate","voted.14p"),with=F],by="resp.id",all.x=T,all.y=F)

library(foreign)
write.dta(long[,c("did.not.update","logit.cert","logit.cert.prev","pr.true.first.round","signal.cons.w.first","sig.comb","sig.true","sig.fals","iq.round","resp.id","stub","round.num","fact.short2","pid.summ","like.compromise","ideo2","voted.14p","poli.contact.or.donate","weight"),with=F],file=sprintf("DataForStata%s.dta",.use),version=9)

#
# ===================
# Non-parametric test of tipping point
# vs Bayesian model. Aggreagate mean and
# median reversion to signals relative
# to previous signals received, and observe
# largest and smallest reversions.
# ===================
#

# Create data.table of lagged signals.
setkey(long,resp.id,round)
# Create table of lagged signals to round 3.
lagged.sigs <- long[round.num %in% c(2),
                list(lag.sig=toString(signal),round.num=3),
                by=c("resp.id","stub")]
# rbindlist table of lagged signals to round 4.
lagged.sigs <- rbindlist(list(lagged.sigs,
               long[round.num %in% c(2,3),
                list(lag.sig=toString(signal),round.num=4),
                by=c("resp.id","stub")]))
# rbindlist table of lagged signals to round 5.
lagged.sigs <- rbindlist(list(lagged.sigs,
               long[round.num %in% c(2,3,4),
                list(lag.sig=toString(signal),round.num=5),
                by=c("resp.id","stub")]))

# Merge long to lagged.sigs.
revs <- merge(
long[,c("resp.id", "stub", "round", "round.num", "signal", "pr.true", "pr.true.prev", "did.not.update", "iq.round", "weight"), with=F],
lagged.sigs,by=c("resp.id", "stub", "round.num"),all.x=T,all.y=F)

# Calculate belief revision.
revs[,revision := pr.true - pr.true.prev]
# Change TRUE and FALSE to T and F.
revs[,signal := gsub("TRUE","T",gsub("FALSE","F",signal))]
revs[,lag.sig := gsub("TRUE","T",gsub("FALSE","F",lag.sig))]
# Informative name for iq.round.
revs[,Type := ifelse(iq.round == 1,"IQ","Partisan")]

# Aggregate to the signal and lagged.signal for
# revisions round 2-5, among those who moved at
# least once.
r2 <- revs[round.num != 1 & did.not.update == 0,list(mean=weighted.mean(revision,w=weight,na.rm=T), median=weightedMedian(revision,w=weight,na.rm=T), count=.N),by=c("Type","round.num","lag.sig","signal")]
# Dropping missing/small-N responses.
r2 <- r2[!is.na(mean) & count > 5,]
r2[is.na(lag.sig),lag.sig := ""]
r2[,lag.sig := gsub(" ","",lag.sig)]
r2[,abs.mean := -1*abs(mean)]

# Write out tables.
cat("Writing out tables showing mean and median
revisions among those who updated at least once
in the contest by signal pattern. Evaluates tipping
point models.\n")

setkey(r2,abs.mean) # Sort on absolute mean revision
r2[,mean := round(mean,1)] # Round for printing.
r2[,median := round(median,1)] # Round for printing.
r2[,abs.mean := NULL]
r2p <- r2[Type == "Partisan",]
r2i <- r2[Type == "IQ",]
r2p[,Type := NULL];r2i[,Type := NULL]

# Partisan questions.
out <- c("\\\\","\\hline",
"Round  & Lagged  & Current & Mean     & Median  & Count \\\\",
"number & signals & signal  & revision & revision \\\\",
"\\hline","\\hline",
apply(r2p,1,.makeRow),
"\\hline")

# Write out.
#cat(paste(out,collapse="\n "))
write(paste(out,collapse="\n "),file=file.path("tablesAndFigs",sprintf("BeliefRevisionsBySignal-Partisan%s.tex",.use)))

# IQ questions.
out <- c("\\\\","\\hline",
"Round  & Lagged  & Current & Mean     & Median  & Count \\\\",
"number & signals & signal  & revision & revision \\\\",
"\\hline","\\hline",
apply(r2i,1,.makeRow),
"\\hline")

# Write out.
#cat(paste(out,collapse="\n "))
write(paste(out,collapse="\n "),file=file.path("tablesAndFigs",sprintf("BeliefRevisionsBySignal-IQ%s.tex",.use)))
